Load the data and setup properly
The table to analyze
datatable(f_all,
filter = "top",
class = "cell-border stripe",
extensions = c("AutoFill",
#"Buttons",
"ColReorder",
"KeyTable",
"Responsive",
"RowReorder"),
options = list(pageLength = 20,
lengthMenu = c(5, 10, 15, 20, 25, 50, 100, 250, 500, 1000),
order = list(list(7, "desc")),
autofill = TRUE,
#dom = "Bfrtip",
#buttons = c("copy", "csv", "excel", "pdf", "print"),
colReorder = TRUE,
keys = TRUE,
rowReorder = TRUE)
) %>% formatStyle(c("Raw_Gain_M", "Raw_Cover_M", "Raw_Freq_M"),
background = styleColorBar(c(0, f_all$Raw_Gain_M[order(f_all$Raw_Gain_M, decreasing = TRUE)[2]]), 'lightgreen'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle(c("TS_Gain_M", "TS_Cover_M", "TS_Freq_M"),
background = styleColorBar(c(0, f_all$TS_Gain_M[order(f_all$TS_Gain_M, decreasing = TRUE)[2]]), 'pink'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle(c("Raw_Count", "TS_Count"),
background = styleColorBar(c(0, 5), 'lightgrey'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatPercentage(columns = c("Raw_Gain_M", "TS_Gain_M", "Raw_Cover_M", "TS_Cover_M", "Raw_Freq_M", "TS_Freq_M"),
digits = 4)
Some plots
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Gain_M, y = TS_Gain_M, color = Raw_Rank, color2 = TS_Rank)) + labs(title = "TS Gain vs Raw Gain, Rank Multivariate", x = "Raw Gain", y = "Time-Series Gain") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain Rank", axis_title_y = "TS Gain Rank") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Cover_M, y = TS_Cover_M, color = Raw_Rank, color2 = TS_Rank)) + labs(title = "TS Cover vs Raw Cover, Rank Multivariate", x = "Raw Cover", y = "Time-Series Cover") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain Rank", axis_title_y = "TS Gain Rank") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Freq_M, y = TS_Freq_M, color = Raw_Rank, color2 = TS_Rank)) + labs(title = "TS Freq vs Raw Freq, Rank Multivariate", x = "Raw Freq", y = "Time-Series Freq") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain Rank", axis_title_y = "TS Gain Rank") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Rank, y = TS_Rank, color = Raw_Gain_M, color2 = TS_Gain_M)) + labs(title = "TS Gain Rank vs Raw Gain Rank, Non-Rank Multivariate", x = "Raw Gain Rank", y = "Time-Series Gain Rank") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain", axis_title_y = "TS Gain") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Rank, y = TS_Rank, color = Raw_Cover_M, color2 = TS_Cover_M)) + labs(title = "TS Gain Rank vs Raw Gain Rank, Cover Multivariate", x = "Raw Gain Rank", y = "Time-Series Gain Rank") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Cover", axis_title_y = "TS Cover") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Rank, y = TS_Rank, color = Raw_Freq_M, color2 = TS_Freq_M)) + labs(title = "TS Gain Rank vs Raw Gain Rank, Freq Multivariate", x = "Raw Gain Rank", y = "Time-Series Gain Rank") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Freq", axis_title_y = "TS Freq") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
---
title: "Compare Numeric Time Series"
output:
  html_document:
    css: discretization.css
  html_notebook:
    css: discretization.css
---

# Load the data and setup properly

```{r}
library(data.table)
library(DT)
library(ggplot2)
library(colorplaner)
library(ggiraph)
f_imp1 <- data.frame(matrix(nrow = 0, ncol = 4))
f_imp2 <- data.frame(matrix(nrow = 0, ncol = 4))
f_fold <- list()
setwd("E:/")
for (i in 1:5) {
  f_imp1 <- rbind(f_imp1, cbind(fread(paste("Laurae/20161017_test_numeric/best_features", i, ".csv", sep = ""), data.table = FALSE), Fold = i))
  f_imp2 <- rbind(f_imp2, cbind(fread(paste("Laurae/20161017_test_ts/best_features", i, ".csv", sep = ""), data.table = FALSE), Fold = i))
}

f_imp1_mean <- aggregate(f_imp1[, c("Gain", "Cover", "Frequency")], by = list(Feature = f_imp1$Feature), mean)
colnames(f_imp1_mean) <- c("Feature", "Raw_Gain_M", "Raw_Cover_M", "Raw_Freq_M")
f_imp1_mean$Raw_Rank <- (nrow(f_imp1_mean) + 1) - rank(f_imp1_mean$Raw_Gain_M, ties.method = "max")
f_imp1_sd <- aggregate(f_imp1[, c("Gain", "Cover", "Frequency")], by = list(Feature = f_imp1$Feature), sd)
f_imp1_sd[is.na(f_imp1_sd)] <- 0
colnames(f_imp1_sd) <- c("Feature", "Raw_Gain_SD", "Raw_Cover_SD", "Raw_Freq_SD")
f_imp1_len <- aggregate(f_imp1[, c("Feature")], by = list(Feature = f_imp1$Feature), length)
colnames(f_imp1_len) <- c("Feature", "Raw_Count")
f_imp1 <- cbind(f_imp1_mean, f_imp1_sd[, -1], Raw_Count = f_imp1_len[, -1])
f_imp2_mean <- aggregate(f_imp2[, c("Gain", "Cover", "Frequency")], by = list(Feature = f_imp2$Feature), mean)
colnames(f_imp2_mean) <- c("Feature", "TS_Gain_M", "TS_Cover_M", "TS_Freq_M")
f_imp2_mean$TS_Rank <- (nrow(f_imp1_mean) + 1) - rank(f_imp2_mean$TS_Gain_M, ties.method = "max")
f_imp2_sd <- aggregate(f_imp2[, c("Gain", "Cover", "Frequency")], by = list(Feature = f_imp2$Feature), sd)
f_imp2_sd[is.na(f_imp2_sd)] <- 0
colnames(f_imp2_sd) <- c("Feature", "TS_Gain_SD", "TS_Cover_SD", "TS_Freq_SD")
f_imp2_len <- aggregate(f_imp2[, c("Feature")], by = list(Feature = f_imp2$Feature), length)
colnames(f_imp2_len) <- c("Feature", "TS_Count")
f_imp2 <- cbind(f_imp2_mean, f_imp2_sd[, -1], TS_Count = f_imp2_len[, -1])
#f_imp1 <- as.data.table(f_imp1)
#f_imp2 <- as.data.table(f_imp2)

f_all <- data.table(data.frame(Feature = c("LeakStart", "LeakEnd", fread("E:/Laurae/TimeSeries/used_columns.csv")$x)))
f_all <- merge(f_all, f_imp1, by = "Feature", all.x = TRUE)
f_all <- merge(f_all, f_imp2, by = "Feature", all.x = TRUE)
f_all[is.na(f_all)] <- 0
f_all <- f_all[, c(1, 9, 17, 5, 13, 2, 10, 3, 11, 4, 12), with = FALSE]
#f_all <- f_all[, c(1, 9, 17, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15, 4, 12, 8, 16), with = FALSE]

```

# The table to analyze

```{r}
datatable(f_all,
          filter = "top",
          class = "cell-border stripe",
          extensions = c("AutoFill",
                         #"Buttons",
                         "ColReorder",
                         "KeyTable",
                         "Responsive",
                         "RowReorder"),
          options = list(pageLength = 20,
                         lengthMenu = c(5, 10, 15, 20, 25, 50, 100, 250, 500, 1000),
                         order = list(list(7, "desc")),
                         autofill = TRUE,
                         #dom = "Bfrtip",
                         #buttons = c("copy", "csv", "excel", "pdf", "print"),
                         colReorder = TRUE,
                         keys = TRUE,
                         rowReorder = TRUE)
          ) %>% formatStyle(c("Raw_Gain_M", "Raw_Cover_M", "Raw_Freq_M"),
                             background = styleColorBar(c(0, f_all$Raw_Gain_M[order(f_all$Raw_Gain_M, decreasing = TRUE)[2]]), 'lightgreen'),
                             backgroundSize = '100% 90%',
                             backgroundRepeat = 'no-repeat',
                             backgroundPosition = 'center') %>%
                formatStyle(c("TS_Gain_M", "TS_Cover_M", "TS_Freq_M"),
                             background = styleColorBar(c(0, f_all$TS_Gain_M[order(f_all$TS_Gain_M, decreasing = TRUE)[2]]), 'pink'),
                             backgroundSize = '100% 90%',
                             backgroundRepeat = 'no-repeat',
                             backgroundPosition = 'center') %>%
                formatStyle(c("Raw_Count", "TS_Count"),
                             background = styleColorBar(c(0, 5), 'lightgrey'),
                             backgroundSize = '100% 90%',
                             backgroundRepeat = 'no-repeat',
                             backgroundPosition = 'center') %>%
                formatPercentage(columns = c("Raw_Gain_M", "TS_Gain_M", "Raw_Cover_M", "TS_Cover_M", "Raw_Freq_M", "TS_Freq_M"),
                                 digits = 4)
```

# Some plots

```{r, fig.height=14, fig.width=18}
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Gain_M, y = TS_Gain_M, color = Raw_Rank, color2 = TS_Rank)) + labs(title = "TS Gain vs Raw Gain, Rank Multivariate", x = "Raw Gain", y = "Time-Series Gain") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain Rank", axis_title_y = "TS Gain Rank") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Cover_M, y = TS_Cover_M, color = Raw_Rank, color2 = TS_Rank)) + labs(title = "TS Cover vs Raw Cover, Rank Multivariate", x = "Raw Cover", y = "Time-Series Cover") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain Rank", axis_title_y = "TS Gain Rank") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Freq_M, y = TS_Freq_M, color = Raw_Rank, color2 = TS_Rank)) + labs(title = "TS Freq vs Raw Freq, Rank Multivariate", x = "Raw Freq", y = "Time-Series Freq") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain Rank", axis_title_y = "TS Gain Rank") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Rank, y = TS_Rank, color = Raw_Gain_M, color2 = TS_Gain_M)) + labs(title = "TS Gain Rank vs Raw Gain Rank, Non-Rank Multivariate", x = "Raw Gain Rank", y = "Time-Series Gain Rank") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Gain", axis_title_y = "TS Gain") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Rank, y = TS_Rank, color = Raw_Cover_M, color2 = TS_Cover_M)) + labs(title = "TS Gain Rank vs Raw Gain Rank, Cover Multivariate", x = "Raw Gain Rank", y = "Time-Series Gain Rank") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Cover", axis_title_y = "TS Cover") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(as.data.frame(f_all)[1:(nrow(f_all) - 2), ], aes(x = Raw_Rank, y = TS_Rank, color = Raw_Freq_M, color2 = TS_Freq_M)) + labs(title = "TS Gain Rank vs Raw Gain Rank, Freq Multivariate", x = "Raw Gain Rank", y = "Time-Series Gain Rank") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Raw Freq", axis_title_y = "TS Freq") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
```

